부동산 문제 풀이

- 1단계 데이터 전처리

- 2단계 탐색적 자료 분석

- 2-1단계 시계열 분석

- 2-2단계 맵지도 분석

- 3단계 기계학습 예측 (TBD)

- 4단계 외부데이터 조인 (TBD)

#윈도우 10 64bit, R 3.4.2 버전
#설치 라이브러리, Rtools 설치 필요 (관리자 권한으로 설치)
#install.packages("tidyverse")
#install.packages("readxl")
#install.packages('DT')
#install.packages('glue')
#install.packages('glue')
#install.packages("htmlwidgets") 
#install.packages("plotly") 
#install.packages('ggmap')
#install.packages("leaflet")
#install.packages("leaflet")
#install.packages("leaflet.minicharts")
library(tidyverse)
## -- Attaching packages --------------------- tidyverse 1.2.1 --
## √ ggplot2 2.2.1.9000     √ purrr   0.2.4     
## √ tibble  1.3.4          √ dplyr   0.7.4     
## √ tidyr   0.7.2          √ stringr 1.2.0     
## √ readr   1.1.1          √ forcats 0.2.0
## -- Conflicts ------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(DT)
library(htmlwidgets)
library(sp)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggmap)
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(leaflet)
library(leaflet.minicharts)
library(htmltools)

1단계 데이터 전처리 (1)

setwd("C:/budong/data")
files <- list.files(pattern=".xls")
DF <- NULL
files
##  [1] "2010년_01월_전국_실거래가_아파트(매매).xls"
##  [2] "2010년_02월_전국_실거래가_아파트(매매).xls"
##  [3] "2010년_03월_전국_실거래가_아파트(매매).xls"
##  [4] "2010년_04월_전국_실거래가_아파트(매매).xls"
##  [5] "2010년_05월_전국_실거래가_아파트(매매).xls"
##  [6] "2010년_06월_전국_실거래가_아파트(매매).xls"
##  [7] "2010년_07월_전국_실거래가_아파트(매매).xls"
##  [8] "2010년_08월_전국_실거래가_아파트(매매).xls"
##  [9] "2010년_09월_전국_실거래가_아파트(매매).xls"
## [10] "2010년_10월_전국_실거래가_아파트(매매).xls"
## [11] "2010년_11월_전국_실거래가_아파트(매매).xls"
## [12] "2010년_12월_전국_실거래가_아파트(매매).xls"
## [13] "2011년_01월_전국_실거래가_아파트(매매).xls"
## [14] "2011년_02월_전국_실거래가_아파트(매매).xls"
## [15] "2011년_03월_전국_실거래가_아파트(매매).xls"
## [16] "2011년_04월_전국_실거래가_아파트(매매).xls"
## [17] "2011년_05월_전국_실거래가_아파트(매매).xls"
## [18] "2011년_06월_전국_실거래가_아파트(매매).xls"
## [19] "2011년_07월_전국_실거래가_아파트(매매).xls"
## [20] "2011년_08월_전국_실거래가_아파트(매매).xls"
## [21] "2011년_09월_전국_실거래가_아파트(매매).xls"
## [22] "2011년_10월_전국_실거래가_아파트(매매).xls"
## [23] "2011년_11월_전국_실거래가_아파트(매매).xls"
## [24] "2011년_12월_전국_실거래가_아파트(매매).xls"
## [25] "2012년_01월_전국_실거래가_아파트(매매).xls"
## [26] "2012년_02월_전국_실거래가_아파트(매매).xls"
## [27] "2012년_03월_전국_실거래가_아파트(매매).xls"
## [28] "2012년_04월_전국_실거래가_아파트(매매).xls"
## [29] "2012년_05월_전국_실거래가_아파트(매매).xls"
## [30] "2012년_06월_전국_실거래가_아파트(매매).xls"
## [31] "2012년_07월_전국_실거래가_아파트(매매).xls"
## [32] "2012년_08월_전국_실거래가_아파트(매매).xls"
## [33] "2012년_09월_전국_실거래가_아파트(매매).xls"
## [34] "2012년_10월_전국_실거래가_아파트(매매).xls"
## [35] "2012년_11월_전국_실거래가_아파트(매매).xls"
## [36] "2012년_12월_전국_실거래가_아파트(매매).xls"
## [37] "2013년_01월_전국_실거래가_아파트(매매).xls"
## [38] "2013년_02월_전국_실거래가_아파트(매매).xls"
## [39] "2013년_03월_전국_실거래가_아파트(매매).xls"
## [40] "2013년_04월_전국_실거래가_아파트(매매).xls"
## [41] "2013년_05월_전국_실거래가_아파트(매매).xls"
## [42] "2013년_06월_전국_실거래가_아파트(매매).xls"
## [43] "2013년_07월_전국_실거래가_아파트(매매).xls"
## [44] "2013년_08월_전국_실거래가_아파트(매매).xls"
## [45] "2013년_09월_전국_실거래가_아파트(매매).xls"
## [46] "2013년_10월_전국_실거래가_아파트(매매).xls"
## [47] "2013년_11월_전국_실거래가_아파트(매매).xls"
## [48] "2013년_12월_전국_실거래가_아파트(매매).xls"
## [49] "2014년_01월_전국_실거래가_아파트(매매).xls"
## [50] "2014년_02월_전국_실거래가_아파트(매매).xls"
## [51] "2014년_03월_전국_실거래가_아파트(매매).xls"
## [52] "2014년_04월_전국_실거래가_아파트(매매).xls"
## [53] "2014년_05월_전국_실거래가_아파트(매매).xls"
## [54] "2014년_06월_전국_실거래가_아파트(매매).xls"
## [55] "2014년_07월_전국_실거래가_아파트(매매).xls"
## [56] "2014년_08월_전국_실거래가_아파트(매매).xls"
## [57] "2014년_09월_전국_실거래가_아파트(매매).xls"
## [58] "2014년_10월_전국_실거래가_아파트(매매).xls"
## [59] "2014년_11월_전국_실거래가_아파트(매매).xls"
## [60] "2014년_12월_전국_실거래가_아파트(매매).xls"
## [61] "2015년_01월_전국_실거래가_아파트(매매).xls"
## [62] "2015년_02월_전국_실거래가_아파트(매매).xls"
## [63] "2015년_03월_전국_실거래가_아파트(매매).xls"
## [64] "2015년_04월_전국_실거래가_아파트(매매).xls"
## [65] "2015년_05월_전국_실거래가_아파트(매매).xls"
## [66] "2015년_06월_전국_실거래가_아파트(매매).xls"
## [67] "2015년_07월_전국_실거래가_아파트(매매).xls"
## [68] "2015년_08월_전국_실거래가_아파트(매매).xls"
## [69] "2015년_09월_전국_실거래가_아파트(매매).xls"
## [70] "2015년_10월_전국_실거래가_아파트(매매).xls"
## [71] "2015년_11월_전국_실거래가_아파트(매매).xls"
## [72] "2015년_12월_전국_실거래가_아파트(매매).xls"
## [73] "2016년_01월_전국_실거래가_아파트(매매).xls"
## [74] "2016년_02월_전국_실거래가_아파트(매매).xls"
## [75] "2016년_03월_전국_실거래가_아파트(매매).xls"
## [76] "2016년_04월_전국_실거래가_아파트(매매).xls"
## [77] "2016년_05월_전국_실거래가_아파트(매매).xls"
## [78] "2016년_06월_전국_실거래가_아파트(매매).xls"
## [79] "2016년_07월_전국_실거래가_아파트(매매).xls"
## [80] "2016년_08월_전국_실거래가_아파트(매매).xls"
## [81] "2016년_09월_전국_실거래가_아파트(매매).xls"
## [82] "2016년_10월_전국_실거래가_아파트(매매).xls"
## [83] "2016년_11월_전국_실거래가_아파트(매매).xls"
## [84] "2016년_12월_전국_실거래가_아파트(매매).xls"
for (f in files) {
  year_month <- paste0(substr(f, 1, 4), substr(f, 7, 8))
  dat <- read_excel(f, skip=7, sheet = "서울특별시")
  dat <- cbind(dat, contractyear = year_month)
  DF <- rbind(DF, dat)
  cat('------', dim(DF), '------', '\t')
}
## ------ 4859 10 ------    ------ 8649 10 ------   ------ 12612 10 ------  ------ 15221 10 ------  ------ 17342 10 ------  ------ 19485 10 ------  ------ 21669 10 ------  ------ 23976 10 ------  ------ 26771 10 ------  ------ 31640 10 ------  ------ 37790 10 ------  ------ 44159 10 ------  ------ 51301 10 ------  ------ 57300 10 ------  ------ 62687 10 ------  ------ 66698 10 ------  ------ 70495 10 ------  ------ 74153 10 ------  ------ 78436 10 ------  ------ 83109 10 ------  ------ 87272 10 ------  ------ 91276 10 ------  ------ 94774 10 ------  ------ 98443 10 ------  ------ 101191 10 ------     ------ 105095 10 ------     ------ 109147 10 ------     ------ 112534 10 ------     ------ 115974 10 ------     ------ 118516 10 ------     ------ 120643 10 ------     ------ 122825 10 ------     ------ 126160 10 ------     ------ 131004 10 ------     ------ 135297 10 ------     ------ 139012 10 ------     ------ 142416 10 ------     ------ 146829 10 ------     ------ 153356 10 ------     ------ 160022 10 ------     ------ 166330 10 ------     ------ 171424 10 ------     ------ 174733 10 ------     ------ 179995 10 ------     ------ 187224 10 ------     ------ 194981 10 ------     ------ 200239 10 ------     ------ 206713 10 ------     ------ 213923 10 ------     ------ 222756 10 ------     ------ 230763 10 ------     ------ 236177 10 ------     ------ 241011 10 ------     ------ 246283 10 ------     ------ 252740 10 ------     ------ 261651 10 ------     ------ 271418 10 ------     ------ 279818 10 ------     ------ 285765 10 ------     ------ 292031 10 ------     ------ 301565 10 ------     ------ 311339 10 ------     ------ 326796 10 ------     ------ 338647 10 ------     ------ 348796 10 ------     ------ 359565 10 ------     ------ 370585 10 ------     ------ 379313 10 ------     ------ 389145 10 ------     ------ 399764 10 ------     ------ 406975 10 ------     ------ 411867 10 ------     ------ 416847 10 ------     ------ 422167 10 ------     ------ 430980 10 ------     ------ 441360 10 ------     ------ 452218 10 ------     ------ 465511 10 ------     ------ 477350 10 ------     ------ 488377 10 ------     ------ 499717 10 ------     ------ 511932 10 ------     ------ 517348 10 ------     ------ 519346 10 ------     
setwd("C:/budong/data/2017")
files <- list.files(pattern=".xlsx")
DF2 <- NULL
for (f in files) {
  dat2 <- read_excel(f)
  DF2 <- rbind(DF2, dat2)
  cat('------', dim(DF2), '------', '\t')
}
## ------ 3743 12 ------    ------ 9485 12 ------   ------ 18094 12 ------  ------ 27499 12 ------  ------ 42306 12 ------  ------ 55181 12 ------  ------ 70153 12 ------  ------ 75280 12 ------  ------ 81932 12 ------  ------ 84602 12 ------  
dim(DF)
## [1] 519346     10
dim(DF2)
## [1] 84602    12
colnames(DF) <- c('municipality', 'address', 'complex', 'exclusivearea', 'contractdate', 'transactionamount', 'floors', 'construction', 'roadname', 'contractyear')

colnames(DF2) <- c('municipality', 'address', 'home', 'abundance', 'complex', 'exclusivearea', 'contractyear', 'contractdate', 'transactionamount', 'floors', 'construction', 'roadname')

df_colnames <- colnames(DF)
DF_all <- rbind(DF, DF2[,df_colnames])
dim(DF_all)
## [1] 603948     10
#save(DF_all, file='DF_all.Rdata')
#write.csv(DF_all, file='DF_all.csv', row.names=FALSE)

1단계 데이터 전처리 (2)

glimpse(DF_all)
## Observations: 603,948
## Variables: 10
## $ municipality      <chr> "서울특별시 강남구 개포동", "서울특별시 강남구 개포동", "서울특별시 강남구...
## $ address           <chr> "658-1", "141", "141", "141", "141", "141", ...
## $ complex           <chr> "개포6차우성아파트1동~8동", "개포주공 1단지", "개포주공 1단지", "개...
## $ exclusivearea     <chr> "54.98", "35.44", "35.64", "35.64", "41.98",...
## $ contractdate      <chr> "21~31", "11~20", "1~10", "11~20", "11~20", ...
## $ transactionamount <chr> "66,000", "75,000", "72,400", "75,000", "83,...
## $ floors            <chr> "3", "1", "5", "1", "1", "5", "4", "4", "1",...
## $ construction      <chr> "1987", "1982", "1982", "1982", "1982", "198...
## $ roadname          <chr> "언주로", "개포로", "개포로", "개포로", "개포로", "개포로", "개...
## $ contractyear      <fctr> 201001, 201001, 201001, 201001, 201001, 201...
DF_all <- DF_all %>%  mutate(
    location_1 = sapply(strsplit(DF_all$municipality, ' '), "[", 1),
    location_2 = sapply(strsplit(DF_all$municipality, ' '), "[", 2),
    location_3 = sapply(strsplit(DF_all$municipality, ' '), "[", 3),
      exclusivearea = as.numeric(exclusivearea),
 exclusivearea_section = case_when(exclusivearea < 67 ~ '0~66',
                             exclusivearea >= 67 & exclusivearea < 100 ~ '67~100',
                             exclusivearea >= 100 & exclusivearea < 132 ~ '100~131',
                             exclusivearea >= 132 & exclusivearea < 165 ~ '132~165',
                             exclusivearea >= 165 & exclusivearea < 198 ~ '165~197',
                             exclusivearea >= 198 & exclusivearea < 330 ~ '198~329',
                             exclusivearea >= 330 ~ '330이상'),
    transactionamount = as.numeric(gsub(',', '', transactionamount)),
      floors = as.integer(floors),
    construction = as.integer(construction),
    roadname = as.factor(roadname),
      contractyear = ymd(paste0(contractyear,ifelse(str_sub(contractdate, 1, 2) == '1~', '01', str_sub(contractdate, 1, 2))))
)

2단계 탐색적 자료 분석

2-1단계 시계열 분석 (1)

  • exclusivearea 별 매매가 분포 확인
exclusivearea_price <- DF_all %>% filter(location_1 == '서울특별시') %>%
    group_by(exclusivearea_section) %>%
    summarise(
        count = n(),
        price_mean = mean(transactionamount, na.rm=T),
        price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
        price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
        price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
        price_stdev = sd(transactionamount, na.rm=T)
)
datatable(exclusivearea_price)

2단계 탐색적 자료 분석

2-1단계 시계열 분석 (2)

  • exclusivearea 별 매매가 분포 확인
price <- DF_all %>% filter(location_1 == '서울특별시') %>%
    group_by(contractyear , location_2) %>%
    summarise(
        count = n(),
        price_mean = mean(transactionamount, na.rm=T),
        price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
        price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
        price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
        price_stdev = sd(transactionamount, na.rm=T)
)

datatable(price)

2단계 탐색적 자료 분석

2-1단계 시계열 분석 (3)

  • 구별 거래 매매가 중앙값 확인
ggplotly(
price %>% filter (count >= 100) %>%
ggplot(aes(contractyear , price_50, colour=location_2, group=location_2, fill=location_2)) + geom_line(alpha=1, size=2) +
  xlab("") + ylab("가격 중앙값") +  
  theme_bw() +  ggtitle("거래가격 중앙값(만원 단위)") +  
  theme(plot.title = element_text(size=24,face="bold")) +
  theme(axis.text=element_text(size=14),  axis.title=element_text(size=24,face="bold")) +  
  theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
                                    hjust = 0.5, vjust = 0.5)) +
  theme(legend.text = element_text(colour="black", size = 7)))

2단계 탐색적 자료 분석

2-1단계 시계열 분석 (3)

  • 0~66 전용 면젹, 구별 transactionamount 중앙값 확인
price2 <- DF_all %>% filter(location_1 == '서울특별시' & exclusivearea_section == '0~66' ) %>%
    group_by(contractyear , location_2) %>%
    summarise(
        count = n(),
        price_mean = mean(transactionamount, na.rm=T),
        price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
        price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
        price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
        price_stdev = sd(transactionamount, na.rm=T)
)

ggplotly(
price2 %>% filter (count >= 30) %>% 
ggplot(aes(contractyear , price_50, colour=location_2, group=location_2, fill=location_2)) +  geom_line(alpha=1, size=1) +
  xlab("") + ylab("가격 중앙값") +  
  theme_bw() +  ggtitle("거래가격 중앙값(만원 단위)") +  
  theme(plot.title = element_text(size=24,face="bold")) +
  theme(axis.text=element_text(size=14),  axis.title=element_text(size=24,face="bold")) +  
  theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
                                    hjust = 0.5, vjust = 0.5)) +
  theme(legend.text = element_text(colour="black", size = 7)))

2단계 탐색적 자료 분석

2-1단계 시계열 분석 (4)

  • 전용 면적 section 별 거래 매매가 중앙값 확인
exclusivearea_price <- DF_all %>% filter(location_1 == '서울특별시') %>%
    group_by(contractyear , exclusivearea_section) %>%
    summarise(
        count = n(),
        price_mean = mean(transactionamount, na.rm=T),
        price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
        price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
        price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
        price_stdev = sd(transactionamount, na.rm=T)
)

ggplotly(
exclusivearea_price %>% filter (count >= 50) %>% 
ggplot(aes(contractyear , price_50, colour=exclusivearea_section, group=exclusivearea_section, fill=exclusivearea_section)) +  geom_line(alpha=1, size=1) +
  xlab("") + ylab("가격 중앙값") +  
  theme_bw() +  ggtitle("거래가격 중앙값(만원 단위)") +  
  theme(plot.title = element_text(size=24,face="bold")) +
  theme(axis.text=element_text(size=14),  axis.title=element_text(size=24,face="bold")) +  
  theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
                                    hjust = 0.5, vjust = 0.5)) +
  theme(legend.text = element_text(colour="black", size = 7)))

2단계 탐색적 자료 분석

2-1단계 시계열 분석 (5)

  • 특정 지역 전용 면적 section 별 거래 매매가 중앙값 확인
exclusivearea_location2_price <- DF_all %>% filter(location_1 == '서울특별시') %>%
    group_by(contractyear , exclusivearea_section, location_2) %>%
    summarise(
        count = n(),
        price_mean = mean(transactionamount, na.rm=T),
        price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
        price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
        price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
        price_stdev = sd(transactionamount, na.rm=T)
)

select_location <- c('강남구', '서초구', '종로구', '영등포구', '강서구', '마포구', '관악구', '강북구')
select_section <- c('0~66', '67~100')

ggplotly(
exclusivearea_location2_price %>% filter (count >= 30 & location_2 %in% select_location & exclusivearea_section %in% select_section) %>% 
ggplot(aes(contractyear , price_50, colour=location_2, group=location_2, fill=location_2)) +  geom_line(alpha=1, size=1) +
  xlab("") + ylab("가격 중앙값") +  
  theme_bw() +  ggtitle("거래가격 중앙값(만원 단위)") +  
  theme(plot.title = element_text(size=24,face="bold")) + facet_grid(.~exclusivearea_section) +
  theme(axis.text=element_text(size=14),  axis.title=element_text(size=24,face="bold")) +  
  theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
                                    hjust = 0.5, vjust = 0.5)) +
  theme(legend.text = element_text(colour="black", size = 7))) 

2단계 탐색적 자료 분석

2-2단계 맵지도 분석 (1)

  • 구글 맵 사용, 좌표는 시/구/동 기준으로 구글 맵에서 검색하며 찾음
  • 검색이 데스크탑에서는 데이터 확보가 느려서 1천개만 샘플링
  • 여기에서는 load 함수로 계산된 좌표 값 로딩
#1천개 샘플링, 주소를 좌표계로 변환하는 과정이 개별 검색해서 좌표 값 얻어내므로 느림

#DF_all2 <- DF_all[1:1000,]
#DF_address <- as.data.frame(DF_all2[,'municipality'])
#colnames(DF_address) <- 'municipality'
#DF_address$municipality <- enc2utf8(as.character(DF_address$municipality))
#DF_address2 <- mutate_geocode(DF_address, municipality, source = 'google')
#DF_all2 <- DF_all2 %>% mutate(lon = DF_address2$lon, lat = DF_address2$lat)
#save(DF_all2, file='DF_all2.Rdata')
#write.csv(DF_all2, file='DF_all2.csv', row.names=FALSE)
setwd("C:/budong/data/2017")
load('DF_all2.Rdata')
seoul_lonlat = unlist(geocode('seoul', source='google'))
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=seoul&sensor=false
head(DF_all2)
##                     시군구  번지                   단지명 전용면적 계약일
## 1 서울특별시 강남구 개포동 658-1 개포6차우성아파트1동~8동    54.98  21~31
## 2 서울특별시 강남구 개포동   141           개포주공 1단지    35.44  11~20
## 3 서울특별시 강남구 개포동   141           개포주공 1단지    35.64   1~10
## 4 서울특별시 강남구 개포동   141           개포주공 1단지    35.64  11~20
## 5 서울특별시 강남구 개포동   141           개포주공 1단지    41.98  11~20
## 6 서울특별시 강남구 개포동   141           개포주공 1단지    41.98  11~20
##   거래금액 층 건축년도 도로명   계약년월 location_1 location_2 location_3
## 1    66000  3     1987 언주로 2010-01-21 서울특별시     강남구     개포동
## 2    75000  1     1982 개포로 2010-01-11 서울특별시     강남구     개포동
## 3    72400  5     1982 개포로 2010-01-01 서울특별시     강남구     개포동
## 4    75000  1     1982 개포로 2010-01-11 서울특별시     강남구     개포동
## 5    83000  1     1982 개포로 2010-01-11 서울특별시     강남구     개포동
## 6    85000  5     1982 개포로 2010-01-11 서울특별시     강남구     개포동
##   전용면적_구간      lon      lat
## 1          0~66 127.0609 37.47896
## 2          0~66 127.0609 37.47896
## 3          0~66 127.0609 37.47896
## 4          0~66 127.0609 37.47896
## 5          0~66 127.0609 37.47896
## 6          0~66 127.0609 37.47896
colnames(DF_all2) <- c('municipality', 'address', 'complex', 'exclusivearea', 'contractdate', 'transactionamount', 'floors', 'construction', 'roadname', 'contractyear', 'location_1', 'location_2', 'location_3', 'exclusivearea_section', 'lon', 'lat')

2단계 탐색적 자료 분석

2-2단계 맵지도 분석 (2)

  • 1천개 샘플 데이터, 좌표 기반 매매가 가격(라벨)
leaflet(DF_all2) %>%
    setView(lng = seoul_lonlat[1],
          lat = seoul_lonlat[2],
              zoom = 11) %>%
    addTiles() %>%
    addMarkers(lng = ~lon, lat = ~lat, popup = ~as.character(transactionamount), label = ~as.character(transactionamount)) %>%
      addProviderTiles(providers$OpenStreetMap)
## Warning in validateCoords(lng, lat, funcName): Data contains 19 rows with
## either missing or invalid lat/lon values and will be ignored
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.

2단계 탐색적 자료 분석

2-2단계 맵지도 분석 (3)

  • 1천개 샘플 데이터, 좌표별 카운트 분포
DF_all3 <- DF_all2 %>% group_by(lon , lat) %>%
    summarise(
        count = n(),
        exclusivearea_section_0_66 = sum(ifelse(exclusivearea_section == '0~66', 1, 0)),
        exclusivearea_section_67_100 = sum(ifelse(exclusivearea_section == '67~100', 1, 0)),
        exclusivearea_section_100_131 = sum(ifelse(exclusivearea_section == '100~131', 1, 0)),
        exclusivearea_section_132_165 = sum(ifelse(exclusivearea_section == '132~165', 1, 0)),
        price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
        price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
        price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
        price_stdev = sd(transactionamount, na.rm=T)
)
DF_all3
## # A tibble: 31 x 11
## # Groups:   lon [?]
##         lon      lat count exclusivearea_section_0_66
##       <dbl>    <dbl> <int>                      <dbl>
##  1 126.8104 37.55895     1                          0
##  2 126.8139 37.57646    36                         26
##  3 126.8307 37.57649     7                          4
##  4 126.8342 37.56483    48                         37
##  5 126.8346 37.55464    14                          2
##  6 126.8533 37.55865    30                         19
##  7 126.8718 37.55403    22                         11
##  8 127.0089 37.63873     5                          1
##  9 127.0229 37.52412     4                          2
## 10 127.0269 37.62683    27                         15
## # ... with 21 more rows, and 7 more variables:
## #   exclusivearea_section_67_100 <dbl>,
## #   exclusivearea_section_100_131 <dbl>,
## #   exclusivearea_section_132_165 <dbl>, price_25 <dbl>, price_50 <dbl>,
## #   price_75 <dbl>, price_stdev <dbl>
pal <- colorNumeric(c("red", "green", "blue", "yellow"), 1:10)


basemap <- leaflet(width = "100%", height = "400px") %>%
  setView(lng = seoul_lonlat[1],
          lat = seoul_lonlat[2],
              zoom = 11) %>%
  addTiles() %>%
  addProviderTiles(providers$OpenStreetMap)

basemap %>%
  addMinicharts(
    DF_all3$lon, DF_all3$lat,
    chartdata = DF_all3$count,
    showLabels = TRUE,
    width = 45
  )
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.

2단계 탐색적 자료 분석

2-2단계 맵지도 분석 (4)

  • 1천개 샘플 데이터, 좌표별 exclusivearea 파이 차트
pal <- colorNumeric(c("red", "green", "blue", "yellow"), 1:10)

basemap %>%
  addMinicharts(
    DF_all3$lon, DF_all3$lat,
    type = "pie",
    chartdata = DF_all3[, c("exclusivearea_section_0_66", "exclusivearea_section_67_100", "exclusivearea_section_100_131", "exclusivearea_section_132_165")], 
    colorPalette = pal(c(1,6,9,10)), 
    width = 45
  )
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.

2단계 탐색적 자료 분석

2-2단계 맵지도 분석 (6)

  • 1천개 샘플 데이터, 좌표/exclusivearea별 매매가 가격 중앙값 비교
pal <- colorNumeric(c("red", "green", "blue", "yellow"), 1:10)

DF_all4 <- DF_all2 %>% group_by(lon , lat, exclusivearea_section) %>%
    summarise(price_50 = quantile(transactionamount, probs= 0.50, na.rm=T)) 

DF_all4 <- DF_all4 %>% spread(exclusivearea_section,price_50)
DF_all4[is.na(DF_all4)] <- 0

basemap %>%
  addMinicharts(
    DF_all4$lon, DF_all4$lat,
    chartdata = DF_all4[, c("0~66","100~131","132~165","165~197","67~100")],
    colorPalette = pal(c(1,6,9,10)),
    width = 45
  )
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.